import math

import requests
from lxml import etree
import re
import time

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0"
}
cookies = {
    "SECKEY_ABVK": "eYr8zSvtHQxCqdyuVDFeghtryt+pSdAsJPvo2w+t5Yk%3D",
    "BMAP_SECKEY": "eb7bYvvoqmXADX4zL7-M3DJvhnZVsHDpCBWnxAFfy-MxRQeCIYymz5uWKDuZlwjDXuByVLaYH7jJBRb4fgGmILjMfQbT_9FDQPNKVjXxtCcHfF5c1Tk2HcJ4olHv-BFnm1eqaikZxT3XQ1u9IEcmhqOPeEKNEzHfhieKYGkszl2gqFBQjqdJ_cnhp09pQwQQ",
    "lianjia_ssid": "3ebb3d13-54ab-4111-b37a-59d45da78189",
    "lianjia_uuid": "37a2af8b-a736-494a-807d-a25fe3acd446",
    "select_city": "330100",
    "GUARANTEE_POPUP_SHOW": "true",
    "GUARANTEE_BANNER_SHOW": "true",
    "Hm_lvt_46bf127ac9b856df503ec2dbf942b67e": "1750421151",
    "HMACCOUNT": "A8EA38DBEA1C6BFA",
    "_jzqc": "1",
    "_jzqckmp": "1",
    "_qzjc": "1",
    "sajssdk_2015_cross_new_user": "1",
    "sensorsdata2015jssdkcross": "%7B%22distinct_id%22%3A%221978d3b1ee4581-00708466dcee76-4c657b58-2073600-1978d3b1ee529ac%22%2C%22%24device_id%22%3A%221978d3b1ee4581-00708466dcee76-4c657b58-2073600-1978d3b1ee529ac%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D",
    "hip": "lMGeUf694y9hhq-uNdl-yyw26GytSXOh5ujVSxipccORhR_eGa6cDKt9rG06sCo1jnIcGyjsvHf9p98U09NhDCkQTnZOEDh6vkBZ8ICfVdFwoQBKMQUoiZYKbRlLU6l-OqFmP03sIF62zx7Qdk2SD5tzyRDPu7eKo34oW1RoVocXGIpoVcVDxyhl0A%3D%3D",
    "beikeBaseData": "%7B%22parentSceneId%22:%22990858386831298817%22%7D",
    "Hm_lpvt_46bf127ac9b856df503ec2dbf942b67e": "1750425912",
    "_qzja": "1.198925986.1750421151117.1750421151117.1750425912406.1750421151117.1750425912406.0.0.0.2.2",
    "_qzjto": "2.2.0",
    "_jzqa": "1.1076441113807514900.1750421151.1750421151.1750425912.2",
    "_jzqx": "1.1750425912.1750425912.1.jzqsr=hz%2Elianjia%2Ecom|jzqct=/zufang/.-",
    "_jzqb": "1.1.10.1750425912.1",
    "_qzjb": "1.1750425912406.1.0.0.0",
    "srcid": "eyJ0Ijoie1wiZGF0YVwiOlwiZjQzYzM2NmQzNzkxNmE4ODc3ZDkxZDNjNWUxNDcwMWNiNWJjYmU0NjM5OGJiNTgyMjc5MDkxMDNlZWY2OTQ5NjM1YWIwOTdmMmJkMDllYmNhOWE2YjY1OTE3N2NiNWE1OTVhZWZiNTQzZDViNjIyYzI2Yjg4ZjNiN2Q1ZjBhMzNhNWY3ZWYxNGU3ZGJlOGE3NzIwMWEzOTM5NTcyOGJiODgwOTIxM2UyYmE3YWNjZGU5ZTU3YzE0MDBkZjU3ZDIyMmJjODA2ZmIxZDlmZDEyY2ZhYTViNzBmZjM0OWI3MjdkYTE0YTRhNGM5NmZkZGVlMWQwYmRiMWU3MmI2OWU0ZlwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCIxZjA1NGI5YVwifSIsInIiOiJodHRwczovL2h6LmxpYW5qaWEuY29tL3p1ZmFuZy9mdXlhbmdxdS8iLCJvcyI6IndlYiIsInYiOiIwLjEifQ=="
}
page = 1
while page <= math.ceil(103697 / 30):
    url = f"https://hz.lianjia.com/zufang/pg{page}"
    response = requests.get(url, headers=headers, cookies=cookies)
    html_str = response.content.decode()
    root = etree.HTML(html_str)
    home_list = root.xpath("//div[@class='content__list--item--main']")
    print(f"**********第{page}页开始**********")
    for home in home_list:
        title = "".join(home.xpath("../a[@class='content__list--item--aside']/@title")).strip()

        data = "".join(home.xpath(".//p[@class='content__list--item--des']/text()")).replace("\n", "")
        match = re.search(r'\d+\.\d+|\d+', data)
        home_area = str(match.group(0) if match else "") + "㎡"

        price = "".join(home.xpath(".//span[@class='content__list--item-price']/em/text()")) + "元/月"

        home_link = "https://hz.lianjia.com" + "".join(home.xpath("../a[@class='content__list--item--aside']/@href"))
        print(f"标题:{title}\n面积:{home_area}\n价格:{price}\n链接:{home_link}")

    print(f"__________第{page}页结束__________")
    page += 1
    print(f"共{len(home_list)}条数据")
    time.sleep(1)
